#' ---
#' title: "Perceived Inequality and Populism"
#' description: "Replication Experiment"
#' author: "Lukas F. Stoetzer"
#' date: "June 2025"
#' ---

# Libraries
library(tidyverse)
library(texreg)
library(estimatr)

# Load Data
df <- readRDS("00_Data_Experiment.RDS")

# Combine country
df <- df %>% 
  mutate(country = "Combined (DE, DK, IT)",
         countryname = "All countries combined") %>%
  bind_rows(.,df)

# Estimation function 
reg_type <- function(out = "pyr_typA",treat="group", data = df, scale=F,return_reg=F){
  
  # Number of outcomes
  Nout <- length(out)
  res <- list()
  reg_list <- list()

  
  # Loop over outcomes
  for(i in 1:Nout){
    
    if(scale){
      reg <- lm(as.formula(paste("scale(",out[i],") ~",treat,sep="")), data)
      reg_list[[i]] <- reg
      sel_treat <- grep(treat,rownames(summary(reg)$coefficient))
      res[[i]] <- summary(reg)$coefficient[sel_treat,]
    } else {
      reg <- lm(as.formula(paste(out[i], "~",treat)), data)
      reg_list[[i]] <- reg
      sel_treat <- grep(treat,rownames(summary(reg)$coefficient))
      res[[i]] <- summary(reg)$coefficient[sel_treat,]
    }
    
    
    
  }
  
  if(return_reg){
    return(reg_list)
  }
  
  # bind
  df_res <- bind_rows(res)
  names(df_res) <- c("est","se","tval","pval")
  df_res$treat <- treat
  df_res$var <- out
  rownames(df_res) <- NULL
  return(df_res)
}


# Effect on Perceived Type ===============

  # For all shares
  df_res <- df %>%
    select(pyr_typA:pyr_typE,country, group) %>%
    gather(type,val,-country,-group) %>%
    group_by(type, country) %>%
    do(reg_type(out="val",data=.)) %>%
    filter(var!="(Intercept)") %>%
  mutate(type = case_when(
    type == "pyr_typA" ~ "Type A \n (Small Elite)",
    type == "pyr_typB" ~ "Type B \n (Pyramid)",
    type == "pyr_typC" ~ "Type C \n (No bottom)",
    type == "pyr_typD" ~ "Type D \n (Middle)",
    type == "pyr_typE" ~ "Type E \n (Near Top)",
  ))
  

  # Prepare Shares for Figure
  df_typ <- df %>%
    select(pyr_typA:pyr_typE,country, group) %>%
    gather(type,val,-group,-country) %>%
    group_by(group,country,type) %>%
    summarise("share" = mean(val,na.rm=T),
              "N" = n(),
              "se" = share*(1-share)/sqrt(N)) %>%
    mutate(type = case_when(
      type == "pyr_typA" ~ "Type A \n (Small Elite)",
      type == "pyr_typB" ~ "Type B \n (Pyramid)",
      type == "pyr_typC" ~ "Type C \n (No bottom)",
      type == "pyr_typD" ~ "Type D \n (Middle)",
      type == "pyr_typE" ~ "Type E \n (Near Top)",
    ))

  # Figure
  p <- ggplot(df_typ,aes(x=type, y=share)) +
    theme_minimal() + 
    geom_col(aes(fill=group, group=group),colour="black",width=0.5,    
             position=position_dodge(0.6))  +
    ylim(0,0.5) + ylab("") + xlab("") +
    facet_wrap(~country, ncol = 2) +
    scale_fill_manual(values=c("#ffcc00ff","#ffffff"), name = "Condition") + 
    geom_errorbar(aes(group=group,ymin=share-se, ymax=share+se),
                  position=position_dodge(0.6), width=.25)


  # Prepare Data for difference in shares
  df_typ_diff <- df %>%
    select(pyr_typA:pyr_typE, group,country) %>%
    gather(type,val,-group,-country) %>%
    group_by(group,country,type) %>%
    summarise("share" = mean(val,na.rm=T)) %>%
    spread(group, share) %>%
    mutate(diff = paste(round(100*(wealth-control),1),"%")) %>%
    mutate(type = case_when(
      type == "pyr_typA" ~ "Type A \n (Small Elite)",
      type == "pyr_typB" ~ "Type B \n (Pyramid)",
      type == "pyr_typC" ~ "Type C \n (No bottom)",
      type == "pyr_typD" ~ "Type D \n (Middle)",
      type == "pyr_typE" ~ "Type E \n (Near Top)",
    )) %>%
    mutate(group = "control",
           share = ifelse(control>wealth,control,wealth)) %>%
    left_join(.,df_res) %>%
    mutate(stars = case_when(
      pval < 0.01 ~ "***",
      pval < 0.05 ~ "**",
      pval < 0.1 ~ "*",
      TRUE ~ ""
    ))
    
  
 
  p <- ggplot(filter(df_typ,
                     type=="Type A \n (Small Elite)"),aes(x=type, y=share)) +
    theme_minimal() + 
    geom_col(aes(fill=group, group=group),colour="black",width=0.5,    
             position=position_dodge(0.6))  +
    ylim(0,0.5) + ylab("") + xlab("") +
    facet_wrap(~country, ncol = 4) +
    scale_fill_manual(values=c("#ffcc00ff","#ffffff"), name = "Condition") + 
    geom_errorbar(aes(group=group,ymin=share-se, ymax=share+se),
                  position=position_dodge(0.6), width=.25)
  
  
  p + 
    geom_text(data = filter(df_typ_diff,
                            type=="Type A \n (Small Elite)"), aes(label = diff),nudge_y = 0.03) +
    geom_text(data = filter(df_typ_diff, 
                            type=="Type A \n (Small Elite)"), aes(label = stars),nudge_y = 0.05)
  
  
  # Combined Graph
  ggsave("Figure4.pdf", width=9,height=5) 
  
  
# Effect on Outcome Measures (ITT) ===============
  

  # Estimate countries combined
  itt_cmb <- reg_type(c("popatt_fscores","ptv_popprty","cnjall_pop"), 
                      data = filter(df, country == "Combined (DE, DK, IT)") )

  # Germany
  itt_de <- reg_type(c("popatt_fscores","cnjall_pop","ptv_afd","ptv_linke"), 
                     data = filter(df, country == "DE"))
  
  # Denmark
  itt_dk <- reg_type(c("popatt_fscores","cnjall_pop","ptv_nb","ptv_df"), 
                      data = filter(df, country == "DK"))
  
  # Denmark
  itt_it <- reg_type(c("popatt_fscores","cnjall_pop","ptv_m5s","ptv_lega","ptv_fi","ptv_fdi"), 
                     data = filter(df, country == "IT"))
  
  # Bind together
  df_est <- bind_rows(
    mutate(itt_cmb,country = "Combined (DE, DK, IT)"), 
    mutate(itt_de, country = "DE"),
    mutate(itt_dk, country = "DK"),
    mutate(itt_it, country = "IT")
  ) %>%
    tidyr::separate(var, c("var_type","party"))
  

  df_out <- df_est %>%
    mutate(out_type = 
             case_when(
             var_type == "popatt" ~ "Attitudes",
             var_type == "ptv" ~ "Propensity to Vote",
             var_type == "cnjall" ~ "Conjoint",
             )) %>%
    mutate(out = 
             case_when(
              party == "afd" ~ "AfD ",
              party == "linke" ~ "Linke",
              party == "lega" ~ "Lega",
              party == "df" ~ "DF",
              party == "fdi" ~ "FdI",
              party == "fi" ~ "FI",
              party == "m5s" ~ "M5S",
              party == "nb" ~ "NB",
              party == "fscores" ~ "Populism",
              party == "popprty" ~ "Largest Populist Party",
              party == "pop" ~ "# Votes Cand. \n Populist Slogan",
             )) 
    
  

  ggplot(filter(df_out)) +
    geom_pointrange(aes(x=paste( out_type, "\n", out), 
                        y=est,ymin=est-1.67*se,ymax=est+1.67*se, group=treat),size=1.1) +
    geom_pointrange(aes(x=paste( out_type, "\n", out), 
                        y=est,ymin=est-1.97*se,ymax=est+1.97*se, group=treat),size=.8) +
    geom_hline(yintercept=0,col="red", alpha=0.2) +
    coord_flip() + 
    facet_wrap(~ country, scales="free") +
    ylim(-0.25,0.25)+
    scale_color_grey() +
    ylab("Effect") + xlab("") +
    theme_minimal() +
    theme(legend.position = "none", text = element_text(size=14))
  
  
  ggsave("Figure5.pdf", width=12,height=8)  


# Estimates CACE ======

  
  # Prepare both
  estimation_fun <- function(out, treat, inst, cont=NULL,path_cont=NULL,data = df, return_reg=F){

    # Number of outcomes
    Nout <- length(out)
    res <- list()
    reg_list <- list()
    
    # Loop over outcomes
    for(i in 1:Nout){
    
    # Formula
    form_cace <- formula(paste0(out[i], " ~ ", treat ," | ", inst ))
    form_cace2 <- formula(paste0(out[i], " ~ ", treat,"+", paste(cont,collapse = " + ") ,
                             " | ", inst, "+", paste(cont,collapse = " + ")))
    form_cace3 <- formula(paste0(out[i], " ~ ", treat,"+", paste(c(cont,path_cont),collapse = " + ") ,
                                " | ", inst, "+", paste(c(cont,path_cont),collapse = " + ")))
    form_lm <- formula(paste0(out[i], " ~ ", treat,"+", paste(cont,collapse = " + ")))

    
    # Estimation
    mod_lm <- lm(form_lm, data = data)
    mod_iv <- iv_robust(form_cace, data = data,diagnostics = T)  
    mod_iv2 <- iv_robust(form_cace2, data = data,diagnostics = T)  
    mod_iv3 <- iv_robust(form_cace3, data = data,diagnostics = T)  
 
    
    if(return_reg){
      reg_list[[i]] <- list(mod_lm,mod_iv,mod_iv2,mod_iv3)
    }
    
    # Treatment
    res[[i]] <- bind_rows(
                list("lm"=summary(mod_lm)$coefficients[treat,],
                   "iv"=summary(mod_iv)$coefficients[treat,1:4],
                   "iv2"=summary(mod_iv2)$coefficients[treat,1:4],
                   "iv3"=summary(mod_iv3)$coefficients[treat,1:4]), .id = 'est') %>%
                  mutate(treat = treat, iv = inst, out = out[i],
                         n = c(nobs(mod_lm),mod_iv$nobs,mod_iv2$nobs,mod_iv3$nobs),
                         ftest_firststage = c(0,mod_iv$diagnostic_first_stage_fstatistic[1],
                                              mod_iv2$diagnostic_first_stage_fstatistic[1],mod_iv3$diagnostic_first_stage_fstatistic[1])
                         )
    
    }
    
    if(return_reg){
      return(reg_list)
    }
    
    df_res <- bind_rows(res) %>%
      rename("estimate" = "Estimate",
             "se" = `Std. Error`,
             "tval" = `t value`,
             "pval" = `Pr(>|t|)`)
  }


  
  # Define control variables
  controls <- c("lrscle", "as.factor(income)","as.factor(wealth)",
                "as.factor(gender)", "unemp",#"pyr_typB","pyr_typC","pyr_typD",
                "as.factor(edu)","age")
  path_controls <- c("top_bot","econ_inseq","trust_fscores","natident","ineqbatt_fscores")

  
  # Populism & Conjoint
  res_joint <- df %>%
    group_by(country) %>%
    do(estimation_fun(c("cnjall_pop","popatt_fscores","ptv_popprty"),"pyr_typA","group", data = ., 
                      cont = c(controls),path_cont = path_controls))
  
  # Germany
  res_de <- estimation_fun(c("ptv_afd","ptv_linke"),"pyr_typA","group",
                     data = filter(df, country == "DE"), 
                     cont = controls,path_cont = path_controls) %>% mutate(country = "DE")
  
  # Denmark
  res_dk <- estimation_fun(c("ptv_nb","ptv_df"),"pyr_typA","group",
                     data = filter(df, country == "DK"), 
                     cont = controls,path_cont = path_controls) %>% mutate(country = "DK")
  
  # Denmark
  res_it <- estimation_fun(c("ptv_m5s","ptv_lega","ptv_fi","ptv_fdi"),"pyr_typA","group", 
                     data = filter(df, country == "IT"), 
                     cont = controls,path_cont = path_controls) %>% mutate(country = "IT")
  

  res_cace <- bind_rows(res_joint,res_de,res_dk,res_it) %>%
    mutate(out = 
             case_when(
               out == "ptv_afd" ~ "PTV AfD ",
               out == "ptv_linke" ~ "PTV Linke",
               out == "ptv_df" ~ "PTV DF",
               out == "ptv_fdi" ~ "PTV FdI",
               out == "ptv_fi" ~ "PTV FI",
               out == "ptv_lega" ~ "PTV LEGA",
               out == "ptv_m5s" ~ "PTV M5S",
               out == "ptv_nb" ~ "PTV NB",
               out == "ptv_popprty" ~ "PTV Largest Populist Party",
               out == "popatt_fscores" ~ "Populism Attitudes",
               out == "cnjall_pop" ~ "Conjoint Vote",
             ),
           est2 = factor(est,levels=c("lm","iv","iv2","iv3"),
                            labels = c("OLS pre-treat. contols","IV no controls","IV pre-treat. controls","IV pre-treat. + path controls")
           )) 
  
 
  ggplot(filter(res_cace, est!="iv3", !(out == "PTV Largest Populist Party" &  country != "Combined (DE, DK, IT)")))  +
    geom_point(aes(x=out, 
                        y=estimate,col=est2),
                    size=2, position = position_dodge(0.6)) +
    geom_linerange(aes(x=out, 
                        y=estimate,ymin=estimate-1.67*se,ymax=estimate+1.67*se, col=est2),
                    size=1.4, position = position_dodge(0.6)) +
    geom_linerange(aes(x=out, 
                        y=estimate,ymin=estimate-1.97*se,ymax=estimate+1.97*se, col=est2),
                    size=1, position = position_dodge(0.6)) +
    geom_hline(yintercept=0,col="red", alpha=0.2) +
    coord_flip() + 
    facet_wrap(~ country, scales="free") +
    scale_color_grey() + 
    ylab("CACE of Type A") + xlab("") +
    theme_minimal() +
    theme(text = element_text(size=14), legend.position = "bottom",legend.title = element_blank())

  ggsave("Figure6.pdf", width=9,height=6)  
  